###############################################################################
# The code in this file is used to sampling datasets
###############################################################################

library(plotly)
library(tidyverse)
library(readxl)
library(dplyr)
library ('plyr')
library(ggplot2)
require("knitr")
library(janitor)
library(ggpubr)

###setwd("C:/XXX")
gdata <- read.csv("eval_sun_smith.csv") %>%
  filter(performance!="NULL") %>%
  filter(performance!=6) %>%  
  mutate(supervision_r = dplyr::recode(supervision,"1"=1,"2"=2,"3"=3,"4"=4)) %>%
  mutate(performance_r = dplyr::recode(performance,"1"=5,"2"=4,"3"=3,"4"=2,"5"=1))%>%
  mutate(raterRole_r = dplyr::recode(raterRole,"Attending"= "1","Trainee"= "2")) %>%
  filter(status=="COMPLETE") %>%  
  filter(paired=="TRUE") %>%
  filter(complexity=="2") %>%
  group_by(caseID,raterID,subjectID) %>%            
  dplyr::slice(1) %>%
  select(c(caseID, subjectID, raterID, procID, raterRole_r, supervision_r, performance_r, traineePGY))%>%
  ungroup() 


faculty <- gdata %>%
  filter(raterRole_r=="1")

trainee <- gdata %>%
  filter(raterRole_r=="2")

gdata_wide <- merge(faculty,trainee,by.x = c("caseID","procID","subjectID"),by.y = c("caseID","procID","raterID")) %>%
  select(-c("subjectID.y","traineePGY.y"))

###############  For the PGY 1 Faculty rating #########################
PGY1 <- gdata_wide %>%
  filter(traineePGY.x=="1") 
  
### count the number of procedures per trainee 
length(unique(PGY1$procID))

ProPerTrainee1 <- PGY1 %>%
  group_by(subjectID, procID)%>%
  dplyr::summarise(n=n(),.groups = 'drop') %>%
  group_by(procID)%>%
  dplyr::summarise(n=n(),.groups = 'drop') %>%
  arrange(desc(n))

write.csv(ProPerTrainee1,"ProPerTrainee1.csv")


####create data in PGY1 where each has a pair of procedure administered to each of the trainees
###setwd("C:/XXX")
a <- ProPerTrainee1$procID[1:21]
b <- ProPerTrainee1$procID[1:21]
c <- ProPerTrainee1$procID[1:21]
n <-0

    for (i in 1:length(a))
     {
     for (j in 1:length(b))
      {
       for (k in 1: length(c))
         {
         if(i<j & j<k)
        {
        dat <- PGY1 %>%
             filter(procID==a[i]|procID==b[j]|procID==c[k])%>%
             group_by(subjectID, procID)%>%
             filter(row_number()==1)%>%
             group_by(subjectID) %>%
             filter(n()==3)
        if(nrow(dat)>=150)
         {
          dat1 <- dat %>%
            filter(raterRole_r.x=="1") %>%
            select(c("subjectID","raterRole_r.x","procID","supervision_r.x","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.x) %>%
            dplyr::rename(score=supervision.x)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="1") %>%
            mutate(item="1")
            
          
          dat2 <- dat %>%
            filter(raterRole_r.x=="1") %>%
            select(c("subjectID","raterRole_r.x","procID","performance_r.x","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.x) %>%
            dplyr::rename(score=performance_r.x)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="2") %>%
            mutate(item="2")
            
            
          dat3 <- dat %>%
            filter(raterRole_r.y=="2") %>%
            select(c("subjectID","raterRole_r.y","procID","supervision_r.y","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.y) %>%
            dplyr::rename(score=supervision.y)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="3") %>%
            mutate(item="1")
          
          dat4 <- dat %>%
            filter(raterRole_r.y=="2") %>%
            select(c("subjectID","raterRole_r.y","procID","performance_r.y","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.y) %>%
            dplyr::rename(score=performance_r.y)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="4") %>%
            mutate(item="2")
        n <- n+1  
        write.csv(rbind(dat1,dat2,dat3,dat4), paste("PGY1_", n,".csv",sep=""))
      }
     }
    }
   } 
  }
    

#######################  For the PGY 2   ################################
PGY2 <- gdata_wide %>%
  filter(traineePGY.x=="2") 


### count the number of procedures per trainee 
length(unique(PGY2$procID))

ProPerTrainee2 <- PGY2 %>%
  group_by(subjectID, procID)%>%
  dplyr::summarise(n=n(),.groups = 'drop') %>%
  group_by(procID)%>%
  dplyr::summarise(n=n(),.groups = 'drop') %>%
  arrange(desc(n))

write.csv(ProPerTrainee2,"ProPerTrainee2.csv")
#### create data in PGY2 where each has a pair of procedure administered to each of the trainees
###setwd("C:/XXX")
a <- ProPerTrainee2$procID[1:23]
b <- ProPerTrainee2$procID[1:23]
c <- ProPerTrainee2$procID[1:23]
n <- 0

for (i in 1:length(a))
{
  for (j in 1:length(b))
  {
    for (k in 1: length(c))
    {
      if(i<j & j<k)
      {
        dat <- PGY2 %>%
          filter(procID==a[i]|procID==b[j]|procID==c[k])%>%
          group_by(subjectID, procID)%>%
          filter(row_number()==1)%>%
          group_by(subjectID) %>%
          filter(n()==3)
        if(nrow(dat)>=150)
        {
          dat1 <- dat %>%
            filter(raterRole_r.x=="1") %>%
            select(c("subjectID","raterRole_r.x","procID","supervision_r.x","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.x) %>%
            dplyr::rename(score=supervision_r.x)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="1") %>%
            mutate(item="1")
          
          
          dat2 <- dat %>%
            filter(raterRole_r.x=="1") %>%
            select(c("subjectID","raterRole_r.x","procID","performance_r.x","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.x) %>%
            dplyr::rename(score=performance_r.x)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="2") %>%
            mutate(item="2")
          
          
          dat3 <- dat %>%
            filter(raterRole_r.y=="2") %>%
            select(c("subjectID","raterRole_r.y","procID","supervision_r.y","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.y) %>%
            dplyr::rename(score=supervision_r.y)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="3") %>%
            mutate(item="1")
          
          dat4 <- dat %>%
            filter(raterRole_r.y=="2") %>%
            select(c("subjectID","raterRole_r.y","procID","performance_r.y","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.y) %>%
            dplyr::rename(score=performance_r.y)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="4") %>%
            mutate(item="2")
          n <- n+1  
          write.csv(rbind(dat1,dat2,dat3,dat4), paste("PGY2_", n,".csv",sep=""))
        }
      }
    }
  } 
}



#######################  For the PGY 3   ################################
PGY3 <- gdata_wide %>%
  filter(traineePGY.x=="3") 

### count the number of procedures per trainee 
ProPerTrainee3 <- PGY3 %>%
  group_by(subjectID, procID)%>%
  dplyr::summarise(n=n(),.groups = 'drop') %>%
  group_by(procID)%>%
  dplyr::summarise(n=n(),.groups = 'drop') %>%
  arrange(desc(n))
write.csv(ProPerTrainee3,"ProPerTrainee3.csv")
#################### create data in PGY3 where each has a pair of procedure administered to each of the trainees
###setwd("C:/XXX")
a <- ProPerTrainee3$procID[1:37]
b <- ProPerTrainee3$procID[1:37]
c <- ProPerTrainee3$procID[1:37]
n <- 0

for (i in 1:length(a))
{
  for (j in 1:length(b))
  {
    for (k in 1: length(c))
    {
      if(i<j & j<k)
      {
        dat <- PGY3 %>%
          filter(procID==a[i]|procID==b[j]|procID==c[k])%>%
          group_by(subjectID, procID)%>%
          filter(row_number()==1)%>%
          group_by(subjectID) %>%
          filter(n()==3)
        if(nrow(dat)>=150)
        {
          dat1 <- dat %>%
            filter(raterRole_r.x=="1") %>%
            select(c("subjectID","raterRole_r.x","procID","supervision_r.x","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.x) %>%
            dplyr::rename(score=supervision_r.x)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="1") %>%
            mutate(item="1")
          
          
          dat2 <- dat %>%
            filter(raterRole_r.x=="1") %>%
            select(c("subjectID","raterRole_r.x","procID","performance_r.x","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.x) %>%
            dplyr::rename(score=performance_r.x)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="2") %>%
            mutate(item="2")
          
          
          dat3 <- dat %>%
            filter(raterRole_r.y=="2") %>%
            select(c("subjectID","raterRole_r.y","procID","supervision_r.y","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.y) %>%
            dplyr::rename(score=supervision_r.y)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="3") %>%
            mutate(item="1")
          
          dat4 <- dat %>%
            filter(raterRole_r.y=="2") %>%
            select(c("subjectID","raterRole_r.y","procID","performance_r.y","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.y) %>%
            dplyr::rename(score=performance_r.y)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="4") %>%
            mutate(item="2")
          n <- n+1  
          write.csv(rbind(dat1,dat2,dat3,dat4), paste("PGY3_", n,".csv",sep=""))
        }
      }
    }
  } 
}



#######################  For the PGY 4   ################################
PGY4 <- gdata_wide %>%
  filter(traineePGY.x=="4") 


### count the number of procedures per trainee 
length(unique(PGY4$procID))

ProPerTrainee4 <- PGY4 %>%
  group_by(subjectID, procID)%>%
  dplyr::summarise(n=n(),.groups = 'drop') %>%
  group_by(procID)%>%
  dplyr::summarise(n=n(),.groups = 'drop') %>%
  arrange(desc(n))
write.csv(ProPerTrainee4,"ProPerTrainee4.csv")
#################### create data in PGY4 where each has a pair of procedure administered to each of the trainees
###setwd("C:/XXX")
a <- ProPerTrainee4$procID[1:33]
b <- ProPerTrainee4$procID[1:33]
c <- ProPerTrainee4$procID[1:33]
n<-0

for (i in 1:length(a))
{
  for (j in 1:length(b))
  {
    for (k in 1: length(c))
    {
      if(i<j & j<k)
      {
        dat <- PGY4 %>%
          filter(procID==a[i]|procID==b[j]|procID==c[k])%>%
          group_by(subjectID, procID)%>%
          filter(row_number()==1)%>%
          group_by(subjectID) %>%
          filter(n()==3)
        if(nrow(dat)>=150)
        {
          dat1 <- dat %>%
            filter(raterRole_r.x=="1") %>%
            select(c("subjectID","raterRole_r.x","procID","supervision_r.x","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.x) %>%
            dplyr::rename(score=supervision_r.x)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="1") %>%
            mutate(item="1")
          
          
          dat2 <- dat %>%
            filter(raterRole_r.x=="1") %>%
            select(c("subjectID","raterRole_r.x","procID","performance_r.x","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.x) %>%
            dplyr::rename(score=performance_r.x)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="2") %>%
            mutate(item="2")
          
          
          dat3 <- dat %>%
            filter(raterRole_r.y=="2") %>%
            select(c("subjectID","raterRole_r.y","procID","supervision_r.y","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.y) %>%
            dplyr::rename(score=supervision_r.y)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="3") %>%
            mutate(item="1")
          
          dat4 <- dat %>%
            filter(raterRole_r.y=="2") %>%
            select(c("subjectID","raterRole_r.y","procID","performance_r.y","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.y) %>%
            dplyr::rename(score=performance_r.y)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="4") %>%
            mutate(item="2")
          n <- n+1  
          write.csv(rbind(dat1,dat2,dat3,dat4), paste("PGY4_", n,".csv",sep=""))
        }
      }
    }
  } 
}


#######################  For the PGY 5   ################################
PGY5 <- gdata_wide %>%
  filter(traineePGY.x=="5") 


### count the number of procedures per trainee 
length(unique(PGY5$procID))

ProPerTrainee5 <- PGY5 %>%
  group_by(subjectID, procID)%>%
  dplyr::summarise(n=n(),.groups = 'drop') %>%
  group_by(procID)%>%
  dplyr::summarise(n=n(),.groups = 'drop') %>%
  arrange(desc(n))

write.csv(ProPerTrainee5,"ProPerTrainee5.csv")
#################### create data in PGY5 where each has a pair of procedure administered to each of the trainees
###setwd("C:/XXX")
a <- ProPerTrainee5$procID[1:37]
b <- ProPerTrainee5$procID[1:37]
c <- ProPerTrainee5$procID[1:37]
n <- 0

for (i in 1:length(a))
{
  for (j in 1:length(b))
  {
    for (k in 1: length(c))
    {
      if(i<j & j<k)
      {
        dat <- PGY5 %>%
          filter(procID==a[i]|procID==b[j]|procID==c[k])%>%
          group_by(subjectID, procID)%>%
          filter(row_number()==1)%>%
          group_by(subjectID) %>%
          filter(n()==3)
        if(nrow(dat)>=150)
        {
          dat1 <- dat %>%
            filter(raterRole_r.x=="1") %>%
            select(c("subjectID","raterRole_r.x","procID","supervision_r.x","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.x) %>%
            dplyr::rename(score=supervision_r.x)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="1") %>%
            mutate(item="1")
          
          
          dat2 <- dat %>%
            filter(raterRole_r.x=="1") %>%
            select(c("subjectID","raterRole_r.x","procID","performance_r.x","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.x) %>%
            dplyr::rename(score=performance_r.x)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="2") %>%
            mutate(item="2")
          
          
          dat3 <- dat %>%
            filter(raterRole_r.y=="2") %>%
            select(c("subjectID","raterRole_r.y","procID","supervision_r.y","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.y) %>%
            dplyr::rename(score=supervision_r.y)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="3") %>%
            mutate(item="1")
          
          dat4 <- dat %>%
            filter(raterRole_r.y=="2") %>%
            select(c("subjectID","raterRole_r.y","procID","performance_r.y","traineePGY.x")) %>%
            dplyr::rename(rater=raterRole_r.y) %>%
            dplyr::rename(score=performance_r.y)%>%
            dplyr::rename(PGY=traineePGY.x)%>%
            mutate(subset="4") %>%
            mutate(item="2")
          n <- n+1  
          write.csv(rbind(dat1,dat2,dat3,dat4), paste("PGY5_", n,".csv",sep=""))
        }
      }
    }
  } 
}






